STAR+WASP reduces reference bias in the allele-specific mapping of RNA-seq reads: Manuscript Figures
Loading Required Libraries
library(splitstackshape)
library(dplyr)
library(ggplot2)
library(VennDiagram)
library(RColorBrewer)
library(gridExtra)
library(cowplot)
library(knitr)
library(kableExtra)
library(tidyverse)
library(Hmisc)
library(ggridges)
library(venn)
library(ggthemes)
library(stringr)
library(splitstackshape)
library(chron)
library(magicfor)
library(ggtext)
library(reshape2)
library(plyr)
library(formattable)
library(data.table)
library(ggrepel)
require(MASS)
require(scales) Loading data extracted from alighnments and subsequent data calls
Reads Overlapping Variants and nReads per Sample
STAR_subset <- benchmark_rez_melted %>% filter(Run == "STAR")
df_orders <-
STAR_subset[order(
STAR_subset$Sample,
STAR_subset$Average_input_read_length,
STAR_subset$Number_of_input_reads
), ]
order_df <- as.character(rev(unique(df_orders$Sample)))
df_combined$state1 <-
ordered(
df_combined$state1 ,
levels = c(
"All Alignments",
"Alignment Failed WASP Filtering",
"Alignment Passed WASP Filtering"
)
)
global_colors <- c("darkorange3", "dodgerblue4", "firebrick4")
## Adding read lengths
read_lengths <- benchmark_rez_melted[, c(2, 7)]
read_lengths <- read_lengths[!duplicated(read_lengths$Sample),]
num_reads_reshaped_nreads <-
inner_join(num_reads_reshaped, read_lengths, by = "Sample")
num_reads_reshaped_nreads$Number_of_input_reads_mil <-
(num_reads_reshaped_nreads$Number_of_input_reads) / 1000000
(b <-
num_reads_reshaped_nreads[order(num_reads_reshaped_nreads$Flag, decreasing = T), ] %>% filter(Flag == "vW_Tagged Reads") %>%
ggplot() +
geom_bar(
aes(
x = factor(Sample, levels = order_df),
y = perc,
group = Sample,
fill = factor(Flag, levels = c("vW_Tagged Reads", "Reads with no Tag"))
),
stat = "identity",
width = 0.99,
alpha = 0.9,
color = "white"
) +
geom_text(
aes(
x = Sample,
y = perc,
label = paste0(round(perc, 1), "%")
),
size = 4,
position = position_stack(vjust = 0.5)
) +
geom_text(
aes(
x = Sample,
y = perc,
label = round(Number_of_input_reads_mil, 1)
),
size = 4,
position = position_stack(vjust = 1.016),
color = "gray40"
) +
theme_test(base_size = 13) +
scale_color_manual(values = c("gray60", "gray60")) +
scale_fill_manual(values = "gray70") +
labs(y = "Reads Overlapping Variants (% of Total)", x = "") +
theme(legend.title = element_blank(), legend.position = "none") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 10)) + scale_x_discrete(expand = c(0, 0))
) + theme(
axis.title.y = element_text(size = 12),
axis.text.x = element_text(
size = 12,
angle = 90,
hjust = 1
),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 7)
) Classificaiton of WASP-Rejected Reads
stats_vW_Tags$vW_Tag_desc <-
as.character(stats_vW_Tags$vW_Tag_desc)
stats_vW_Tags$vW_Tag_desc[stats_vW_Tags$vW_Tag_desc == "Variant Base in Read is N/non-ACGT"] <- "Variant Base in Read is N"
(d <-
stats_vW_Tags %>% filter(vW_Tag != 1) %>% filter(round != 0) %>%
ggplot() +
geom_bar(
aes(
x = factor(Sample, levels = order_df),
round(Freq, 3),
group = Sample,
fill = vW_Tag_desc
),
stat = "identity",
width = 0.99,
alpha = 0.8
) +
geom_text(
aes(
x = Sample,
y = Freq,
label = paste0(round(Freq, 2), "%")
),
size = 4,
position = position_stack(vjust = 0.5)
) +
scale_fill_manual(
values = c(
"Variant Base in Read is N" = "gray15",
"Remapped Read did not Map" = "tan2",
"Remapped Read Maps to Different Locus" =
"dodgerblue4",
"Read Overlaps too Many Variants" = "firebrick4"
)
) +
theme_test(base_size = 13) +
labs(y = "WASP-Rejected Reads (%)", x = "") +
theme(axis.text.x = element_text(
angle = 0,
hjust = 1,
vjust = 0.5,
size = 10
)) +
scale_y_continuous(expand = c(0.03, 0), limits = c(0, 4.5)) +
scale_x_discrete(expand = c(0, 0)) + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) + theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
)) +
theme(
legend.title = element_blank(),
legend.position = c(0.22, 0.8)
) +
guides(fill = guide_legend(nrow = 4, byrow = FALSE))
) + theme(
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 9)
)Reference Bias - Three-Way Comp
scale_y <- scales::trans_new(
"signed_log",
transform = function(x)
sign(x) * log1p(abs(x)),
inverse = function(x)
sign(x) * expm1(abs(x))
)
df_combined <- read.csv("df_combined.csv")
df_combined$state1 <-
ordered(
df_combined$state1 ,
levels = c(
"All Alignments",
"Alignment Failed WASP Filtering",
"Alignment Passed WASP Filtering"
)
)
df_combined$state2 <- df_combined$state1
df_combined$state2 <-
as.character(df_combined$state2)
df_combined$state2[df_combined$state2 == "All Alignments"] <-
"All Alignments"
df_combined$state2[df_combined$state2 == "Alignment Passed WASP Filtering"] <-
"Alignment Passed WASP Filtering"
df_combined$state2[df_combined$state2 == "Alignment Failed WASP Filtering"] <-
"Alignment Failed WASP Filtering"
df_combined$state2 <-
ordered(
df_combined$state2 ,
levels = c(
"All Alignments",
"Alignment Failed WASP Filtering",
"Alignment Passed WASP Filtering"
)
)
unique(df_combined$state2)## [1] Alignment Passed WASP Filtering Alignment Failed WASP Filtering
## [3] All Alignments
## 3 Levels: All Alignments < ... < Alignment Passed WASP Filtering
(c <- df_combined %>%
ggplot(aes(
x = state2,
y = diff,
fill = state2,
color = state2
)) +
geom_boxplot(alpha = 0.9, size = 0.2) + geom_point(size = 1) +
labs(y = paste0("Reference Bias (REF% - ALT%)"), x = "") +
geom_vline(xintercept = 0) +
geom_hline(
yintercept = 0,
color = "gray30",
linetype = "dashed",
size = 0.3
) +
scale_color_manual(values = c("gray40", "firebrick4", "steelblue4")) +
scale_fill_manual(values = c("gray40", "firebrick4", "steelblue4")) +
theme_light(base_size = 12) +
scale_y_continuous(
trans = scale_y,
breaks = c(-5, 0, 5, 10, 15, 20, 50, 60, 70, 80, 90, 100),
limits = c(-5, 100),
labels = c(-5, "0", "5", "10", "", "", "50", "", "70", "", "", "100")
) +
annotation_logticks(
sides = "l",
outside = F,
short = unit(0.05, "cm"),
mid = unit(0.05, "cm"),
long = unit(0.2, "cm")
) +
theme(legend.position = "right", legend.title = element_blank()) +
theme(
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 12),
axis.ticks.x = element_blank(),
legend.text = element_text(size = 9)
) + theme(
legend.title = element_blank(),
legend.position = c(0.4, 0.15),
legend.background = element_rect(fill = alpha('white', 0))
)
)Reference Bias - Two-Way Comp
(f <-
df_combined %>% filter(
state1 == "All Alignments" |
state1 == "Alignment Passed WASP Filtering"
) %>%
ggplot(aes(
x = state1, y = diff, fill = state1
)) +
geom_boxplot(alpha = 0.9, size = 0.3) +
geom_point(size = 1, color = "gray20") +
labs(
y = paste0("Reference Bias", "\n", "(REF% - ALT%)"),
x = ""
) +
scale_color_manual(values = c("firebrick4", "steelblue4")) +
scale_fill_manual(values = c("firebrick4", "steelblue4")) +
geom_hline(
yintercept = 0,
color = "gray20",
linetype = "dashed",
size = 0.4
) +
theme_test(base_size = 13) +
scale_y_continuous(limits = c(-5, 10)) +
theme(
legend.position = c(0.45, 0.15),
legend.title = element_blank()
) +
scale_x_discrete(
labels = function(x)
str_wrap(x, width = 12)
) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
) + theme(axis.title.y = element_text(size =12), axis.text.x = element_text(size =8), axis.text.y = element_text(size =8), legend.text = element_text(size =7))Mapping speed for STAR, WASP and STAR+WASP
(e <- wall_clock_melted_converted %>% filter(Threads == "8 Threads") %>%
ggplot(aes(x = factor(Sample, levels=order_df), y = reads_per_hour/1000000, group=Run)) +
geom_point(aes(color=factor(Run),shape=factor(Run),fill=factor(Run)), size=3)+
scale_shape_manual(values=c(3, 17, 16))+
scale_color_manual(values=global_colors)+
labs(y = "Million Reads/Hour", x="")+
theme_light(base_size = 12) + theme(legend.title = element_blank(), legend.position = c(0.76,0.15),
legend.background = element_rect(fill=alpha('white', 0)))+
theme(strip.background =element_rect(fill="white", colour = "white"))+
theme(strip.text = element_text(colour = 'black'), strip.text.x = element_markdown(hjust = 0.5, size=1)) +
theme(axis.text.x = element_text(angle = 90,hjust = 1,vjust = 0.5, size=10)) +
scale_y_log10(breaks=c(0,10,20,30,40,50,60,70,80,90, 100,200,300,400,500,600,700,800,900,1000), limits=c(10, 1000), labels=c("","10","","","","","","","","","100","","","","","","","","","1000"))+
annotation_logticks(sides = "l", outside = F, short = unit(0.05, "cm"),mid = unit(0.05, "cm"),long = unit(0.2, "cm"))+
coord_cartesian(clip = "off")) + theme(axis.title.y = element_text(size =12), axis.text.x = element_text(size =8), axis.text.y = element_text(size =8), legend.text = element_text(size =7))Figure 1 Panel (Main)
b <- b + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
(main <-
plot_grid(
b,
c,
d,
e,
align = c("v", "h"),
rel_widths = c(2.1, 1.1),
rel_heights = c(1, 1.35),
labels = c("(b)", "(d)", "(c)", "(e)")
))Supplementary and Other Figures
Number of Reads per Sample
intro_order <- c(
"Total_R1",
"Total_R2",
"PolyA_R1",
"PolyA_R2",
"Nucleus_PolyA_R1",
"Nucleus_PolyA_R2",
"Nucleus_nonPolyA_R1",
"Nucleus_nonPolyA_R2",
"HG00733",
"NA19239",
"HG00732",
"HG00512",
"NA19238",
"NA19240",
"HG00513",
"HG00731"
)
num_input_reads_per_sample <- read.csv("num_input_reads_per_sample_supp.csv")
num_input_reads_per_sample %>%
ggplot(aes(
reorder(
x = factor(Sample, levels = intro_order),
-Number_of_input_reads_initial / 1000000
),
y = Number_of_input_reads_initial / 1000000
)) +
geom_bar(stat = "identity", fill = "gray65") +
theme_test(base_size = 12) +
scale_y_continuous(expand = c(0, 0), limits = c(
0,
max(num_input_reads_per_sample$Number_of_input_reads_initial) / 1000000 +
1
)) +
scale_x_discrete(expand = c(0, 0)) +
labs(y = "Number of Input Reads (Million)", x = "") +
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
))Reference bias for all reads vs. reads that pass filtering for each sample
df_combined <- read.csv("df_combined.csv")
df_combined_sub <-
df_combined %>% filter(state1 == "All Alignments" |
state1 == "Alignment Passed WASP Filtering")
unique(df_combined_sub$state1)## [1] "Alignment Passed WASP Filtering" "All Alignments"
df_combined$state1 <-
ordered(
df_combined$state1 ,
levels = c("All Alignments", "Alignment Passed WASP Filtering")
)
df_combined_sub <- df_combined_sub[, c(2, 5, 7)]
df_combined_sub_reshaped <-
reshape(
df_combined_sub,
idvar = "Sample",
timevar = "state1",
direction = "wide"
)
colnames(df_combined_sub_reshaped) <-
c("Sample", "REF-ALT After Filtering", "REF-ALT Unfiltered")
df_combined_sub_reshaped %>%
ggplot(aes(x = `REF-ALT Unfiltered`, y = `REF-ALT After Filtering`, color =
Sample)) + geom_point(size = 2.5) + scale_color_manual(
values = c(
"#67000D",
"#A50F15",
"#CB181D",
"#EF3B2C",
"#A63603",
"#FD8D3C",
"#FDD0A2",
"#FDAE6B",
"#08519C",
"#2171B5",
"#4292C6",
"#6BAED6",
"#006D2C",
"#238B45",
"#41AB5D",
"#74C476"
)
) + scale_x_continuous(limits = c(0, 10)) + scale_y_continuous(limits = c(0, 10)) + theme_cowplot()Mapping speed for STAR, WASP and STAR+WASP - 8 threads
wall_clock_melted_converted <- read.csv("wall_clock_melted_converted.csv")
global_colors <- c("darkorange3", "dodgerblue4", "firebrick4")
#Million Reads/Hour 8threads
(threads8 <-
wall_clock_melted_converted %>% filter(Threads == "16 Threads") %>%
ggplot(aes(
x = factor(Sample, levels = order_df),
y = reads_per_hour / 1000000,
group = Run
)) +
geom_point(aes(
color = factor(Run),
shape = factor(Run),
fill = factor(Run)
), size = 3) +
scale_shape_manual(values = c(3, 17, 16)) +
scale_color_manual(values = global_colors) +
labs(y = "Million Reads/Hour", x = "") +
theme_light(base_size = 12) + theme(legend.title = element_blank(), legend.position = "none") +
theme(strip.background = element_rect(
fill = "white", colour = "white"
)) +
theme(
strip.text = element_text(colour = 'black'),
strip.text.x = element_markdown(hjust = 0.5, size = 12)
) +
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
)) +
scale_y_log10(
breaks = c(
0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 2000
),
limits = c(10, 2000),
labels = c(
"", "10", "", "", "", "", "", "", "", "", "100", "", "", "", "", "", "", "", "", "1000", 2000
)
) +
annotation_logticks(
sides = "l",
outside = F,
short = unit(0.05, "cm"),
mid = unit(0.05, "cm"),
long = unit(0.2, "cm")
) +
coord_cartesian(clip = "off")
)Mapping speed for STAR, WASP and STAR+WASP - 32 threads
(threads32 <-
wall_clock_melted_converted %>% filter(Threads == "32 Threads") %>%
ggplot(aes(
x = factor(Sample, levels = order_df),
y = reads_per_hour / 1000000,
group = Run
)) +
geom_point(aes(
color = factor(Run),
shape = factor(Run),
fill = factor(Run)
), size = 3) +
scale_shape_manual(values = c(3, 17, 16)) +
scale_color_manual(values = global_colors) +
labs(y = "", x = "") +
theme_light(base_size = 12) + theme(legend.title = element_blank(), legend.position = "right") +
theme(strip.background = element_rect(
fill = "white", colour = "white"
)) +
theme(
strip.text = element_text(colour = 'black'),
strip.text.x = element_markdown(hjust = 0.5, size = 12)
) +
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
)) +
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
)) +
scale_y_log10(
breaks = c(
0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 2000
),
limits = c(10, 2000),
labels = c(
"", "10",
"", "", "", "", "", "", "", "", "100", "", "", "", "", "", "", "", "", "1000", 2000
)
) +
annotation_logticks(
sides = "l",
outside = F,
short = unit(0.05, "cm"),
mid = unit(0.05, "cm"),
long = unit(0.2, "cm")
) +
coord_cartesian(clip = "off")
)plot_grid(
threads8,
threads32,
nrow = 1,
ncol = 2,
labels = c("(a)", "(b)"),
rel_widths = c(0.68, 0.99),
hjust = 0.01,
align = 'hv'
) Distribution of Reads Overlapping Variants (/not)
## [1] "V1" "V2"
colnames(num_reads) <- c("Number_of_Reads", "Read_File_Path")
num_reads <-
cSplit(num_reads, "Read_File_Path", "/", direction = "wide")
num_reads <- num_reads[, -c(2:9, 11)]
num_reads$Read_File_Path_09 <-
as.character(num_reads$Read_File_Path_09)
num_reads$Read_File_Path_11 <-
as.character(num_reads$Read_File_Path_11)
colnames(num_reads) <- c("Number_of_Reads", "Sample", "Flag")
num_reads$Flag[num_reads$Flag == "WASP_Reads_Sorted_Unique"] <-
"Num_Reads"
num_reads$Flag[num_reads$Flag == "STAR_WASP_vW_Tagged_Reads_Unique"] <-
"vW_Tagged_Reads"
#Converting data frame to short format
num_reads_reshaped <-
reshape(num_reads,
idvar = "Sample",
timevar = "Flag",
direction = "wide")
colnames(num_reads_reshaped)[2] <- "Num_Reads"
colnames(num_reads_reshaped)[3] <- "vW_Tagged_Reads"
# Mutating frequencies
num_reads_reshaped <-
num_reads_reshaped %>% mutate("perc_tagged" = ((vW_Tagged_Reads / Num_Reads) *
100))
num_reads_reshaped <-
num_reads_reshaped %>% mutate("perc_untagged" = 100 - perc_tagged)
# Plotting distributions
num_reads_reshaped_tagged <-
as.data.frame(num_reads_reshaped[, c(1, 4)])
num_reads_reshaped_notags <-
as.data.frame(num_reads_reshaped[, c(1, 5)])
colnames(num_reads_reshaped_tagged)[2] <- "perc"
colnames(num_reads_reshaped_notags)[2] <- "perc"
num_reads_reshaped_tagged$Flag <- "vW_Tagged Reads"
num_reads_reshaped_notags$Flag <- "Reads with no Tag"
num_reads_reshaped <-
rbind(num_reads_reshaped_tagged, num_reads_reshaped_notags)
num_reads_reshaped$Flag <-
ordered(num_reads_reshaped$Flag ,
levels = c("vW_Tagged Reads", "Reads with no Tag"))
unique(num_reads_reshaped$Sample)## [1] "HG00731" "NA12878_Small"
## [3] "NA12878_Nucleus_PolyA_Rep" "HG00513"
## [5] "NA12878_RAMPAGE" "HG00512"
## [7] "NA12878_Nucleus_nonPolyA_Rep" "NA12878_RAMPAGE_Rep"
## [9] "NA12878_Nucleus_nonPolyA" "NA12878_Nucleus_PolyA"
## [11] "NA12878_Total" "NA19238"
## [13] "HG00733" "NA12878_PolyA"
## [15] "HG00732" "NA12878_PolyA_Rep"
## [17] "NA12878_Total_Rep" "NA19239"
## [19] "NA19240"
num_reads_reshaped <-
num_reads_reshaped %>% filter(
Sample != "NA12878_RAMPAGE_Rep" &
Sample != "NA12878_RAMPAGE" & Sample != "NA12878_Small"
)
## Cleaning up Sample names
unique(num_reads_reshaped$Sample)## [1] "HG00731" "NA12878_Nucleus_PolyA_Rep"
## [3] "HG00513" "HG00512"
## [5] "NA12878_Nucleus_nonPolyA_Rep" "NA12878_Nucleus_nonPolyA"
## [7] "NA12878_Nucleus_PolyA" "NA12878_Total"
## [9] "NA19238" "HG00733"
## [11] "NA12878_PolyA" "HG00732"
## [13] "NA12878_PolyA_Rep" "NA12878_Total_Rep"
## [15] "NA19239" "NA19240"
#Removing NA12878 from all NA12878 derived samples
num_reads_reshaped$Sample <-
gsub("NA12878_", "", num_reads_reshaped$Sample)
num_reads_reshaped$Sample <-
gsub("Rep", "R2", num_reads_reshaped$Sample)
num_reads_reshaped$Sample[num_reads_reshaped$Sample == "Total"] <-
"Total_R1"
num_reads_reshaped$Sample[num_reads_reshaped$Sample == "PolyA"] <-
"PolyA_R1"
num_reads_reshaped$Sample[num_reads_reshaped$Sample == "Nucleus_PolyA"] <-
"Nucleus_PolyA_R1"
num_reads_reshaped$Sample[num_reads_reshaped$Sample == "Nucleus_nonPolyA"] <-
"Nucleus_nonPolyA_R1"
# Manuscript Plot Read_Distribution
num_reads_reshaped_plot <- num_reads_reshaped
num_reads_reshaped_plot$Flag <-
as.character(num_reads_reshaped_plot$Flag)
num_reads_reshaped_plot$Flag[num_reads_reshaped_plot$Flag == "vW_Tagged Reads"] <-
"Reads Overlapping Variants"
num_reads_reshaped_plot$Flag[num_reads_reshaped_plot$Flag == "Reads with no Tag"] <-
"Reads not Overlapping Variants"
num_reads_reshaped_plot$Flag <-
as.factor(num_reads_reshaped_plot$Flag)
num_reads_reshaped_plot[order(num_reads_reshaped_plot$Flag, decreasing = F), ] %>%
ggplot() +
geom_bar(
aes(
x = factor(Sample, levels = order_df),
y = perc,
group = Sample,
fill = factor(
Flag,
levels = c("Reads Overlapping Variants", "Reads not Overlapping Variants")
)
),
stat = "identity",
width = 0.99,
alpha = 0.5
) +
geom_text(
aes(
x = Sample,
y = perc,
label = paste0(round(perc, 1), "%")
),
size = 3,
position = position_stack(vjust = 0.5)
) + theme_test(base_size = 12) +
scale_color_manual(values = c('gray10', "gray70")) +
scale_fill_manual(values = c('gray10', "gray70")) +
labs(y = "Read Distribution (%)", x = "") +
theme(legend.title = element_blank()) +
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
)) +
scale_y_discrete(expand = c(0, 0.5)) + scale_x_discrete(expand = c(0, 0))Memmory 8, 16 and 32 Threads Respectively
benchmark_rez_melted_memory <- read.csv("benchmark_rez_memory.csv")
memory8threads <-
benchmark_rez_melted_memory %>% filter(Threads == "8 Threads") %>%
ggplot(aes(
x = factor(Sample, levels = order_df),
y = (Value) / 1000000,
group = Run
)) +
geom_point(aes(
color = factor(Run),
shape = factor(Run),
fill = factor(Run)
), size = 2.5) +
scale_shape_manual(values = c(3, 17, 16)) +
scale_color_manual(values = global_colors) +
labs(y = "Memory (Gigabytes)", x = "") +
theme_light(base_size = 12) + theme(legend.position = "none", legend.title = element_blank()) +
scale_y_continuous(limits = c(30, 40)) +
theme(strip.background = element_rect(fill = "white", colour = "white")) +
theme(
strip.text = element_text(colour = 'black'),
strip.text.x = element_markdown(hjust = 0.5, size = 12)
) +
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
))
memory16threads <-
benchmark_rez_melted_memory %>% filter(Threads == "16 Threads") %>%
ggplot(aes(
x = factor(Sample, levels = order_df),
y = (Value) / 1000000,
group = Run
)) +
geom_point(aes(
color = factor(Run),
shape = factor(Run),
fill = factor(Run)
), size = 2.5) +
scale_shape_manual(values = c(3, 17, 16)) +
scale_color_manual(values = global_colors) +
labs(y = "", x = "") +
theme_light(base_size = 12) + theme(legend.title = element_blank()) +
scale_y_continuous(limits = c(30, 40)) +
theme(strip.background = element_rect(fill = "white", colour = "white")) +
theme(
strip.text = element_text(colour = 'black'),
strip.text.x = element_markdown(hjust = 0.5, size = 12),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
),
legend.position = "none"
)
memory32threads <-
benchmark_rez_melted_memory %>% filter(Threads == "32 Threads") %>%
ggplot(aes(
x = factor(Sample, levels = order_df),
y = (Value) / 1000000,
group = Run
)) +
geom_point(aes(
color = factor(Run),
shape = factor(Run),
fill = factor(Run)
), size = 2.5) +
scale_shape_manual(values = c(3, 17, 16)) +
scale_color_manual(values = global_colors) +
labs(y = "", x = "") +
theme_light(base_size = 12) + theme(legend.title = element_blank()) +
scale_y_continuous(limits = c(30, 40)) +
theme(strip.background = element_rect(fill = "white", colour = "white")) +
theme(
strip.text = element_text(colour = 'black'),
strip.text.x = element_markdown(hjust = 0.5, size = 12),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
))
plot_grid(
memory8threads,
memory16threads,
memory32threads,
nrow = 1,
ncol = 3,
labels = c("(a)", "(b)", "(c)"),
rel_widths = c(0.85, 0.77, 1.115),
hjust = 0.01,
align = 'v'
)Ref_Bias_Per_Sample
df_combined <- read.csv("df_combined.csv")
(pe <-
df_combined %>% filter(
state1 == "Alignment Passed WASP Filtering" |
state1 == "Alignment Failed WASP Filtering"
) %>%
ggplot(aes(
x = Sample,
y = diff,
group = Sample,
fill = state1
)) +
geom_col(alpha = 0.9) +
scale_y_continuous(limits = c(-50, 100)) +
theme_cowplot(font_size = 12) +
geom_point(color = "gray20", size = 1) +
scale_color_manual(
values = c(
"Alignment Passed WASP Filtering" = "skyblue2",
"Alignment Failed WASP Filtering" = "steelblue4"
)
) +
scale_fill_manual(
values = c(
"Alignment Passed WASP Filtering" = "skyblue2",
"Alignment Failed WASP Filtering" = "steelblue4"
)
) +
labs(
y = paste0("Reference Bias ", "\n", "(REF% - ALT%)"),
x = ""
) +
geom_vline(xintercept = 0) +
geom_hline(
yintercept = 0,
color = "gray40",
linetype = "dashed",
linewidth = 0.3
) +
theme(
legend.position = "right",
legend.text = element_text(size = 8) ,
legend.title = element_blank(),
axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5,
size = 10
)
)
)plot_df <- read.csv("plot_df.csv")
(b <- plot_df %>%
filter(df_state == "vw_Tag1" | df_state == "vw_Tag_other") %>%
ggplot(aes(
x = reorder(pass_fail,-freq), freq,
fill = Bias_State
)) +
geom_bar(
stat = "identity",
width = 0.99,
alpha = 0.7
) +
geom_text(
aes(x = pass_fail,
y = freq,
label = freq),
size = 4,
position = position_stack(vjust = 0.5)
) +
scale_fill_manual(values = c("gray45", "gray65")) +
theme_light(base_size = 13) + ggtitle("") +
theme(legend.position = "right", legend.title = element_blank()) +
labs(x = "", y = "Percentage of Reads Aligning to REF/ALT") + scale_x_discrete(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) + theme(plot.title = element_text(hjust = 0.5)) +
facet_wrap( ~ Sample) + theme(strip.background = element_rect(
fill = "white", colour = "white"
)) +
theme(
strip.text = element_text(colour = 'black'),
strip.text.x = element_markdown(hjust = 0.5)
) + theme(axis.text.x = element_text(
angle = 90,
hjust = 0.5,
vjust = 0.5,
size = 10
))
)